<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Custom Analyzers | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="index-management.html" title="Index Management">
<link rel="prev" href="configuring-analyzers.html" title="Configuring Analyzers">
<link rel="next" href="mapping.html" title="Types and Mappings">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-link"><a href="getting-started.html">Getting Started</a></span>
»
<span class="breadcrumb-link"><a href="index-management.html">Index Management</a></span>
»
<span class="breadcrumb-node">Custom Analyzers</span>
</div>
<div class="navheader">
<span class="prev">
<a href="configuring-analyzers.html">« Configuring Analyzers</a>
</span>
<span class="next">
<a href="mapping.html">Types and Mappings »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="custom-analyzers"></a>Custom Analyzers<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/070_Index_Mgmt/20_Custom_Analyzers.asciidoc">edit</a>
</h2>
</div></div></div>
<p>While Elasticsearch comes with a number of analyzers available out of the box,
the real power comes from the ability to create your own custom analyzers
by combining character filters, tokenizers, and token filters in a
configuration that suits your particular data.</p>
<p>In <a class="xref" href="analysis-intro.html" title="Analysis and Analyzers">Analysis and Analyzers</a>, we said that an <em>analyzer</em> is a wrapper that combines
three functions into a single package, which are executed in sequence:</p>
<div class="variablelist">
<dl class="variablelist">
<dt>
<span class="term">
Character filters
</span>
</dt>
<dd>
<p>Character filters are used to “tidy up” a string before it is tokenized.
For instance, if our text is in HTML format, it will contain HTML tags like
<code class="literal">&lt;p&gt;</code> or <code class="literal">&lt;div&gt;</code> that we don’t want to be indexed. We can use the
<a href="/guide/en/elasticsearch/reference/2.4/analysis-htmlstrip-charfilter.html" class="ulink" target="_top"><code class="literal">html_strip</code> character filter</a>
to remove all HTML tags and to convert HTML entities like <code class="literal">&amp;Aacute;</code> into the
corresponding Unicode character <code class="literal">Á</code>.</p>
<p>An analyzer may have zero or more character filters.</p>
</dd>
<dt>
<span class="term">
Tokenizers
</span>
</dt>
<dd>
<p>An analyzer <em>must</em> have a single tokenizer.  The tokenizer breaks up the
string into individual terms or tokens. The
<a href="/guide/en/elasticsearch/reference/2.4/analysis-standard-tokenizer.html" class="ulink" target="_top"><code class="literal">standard</code> tokenizer</a>,
which is used in the <code class="literal">standard</code> analyzer, breaks up a string into
individual terms on word boundaries, and removes most punctuation, but
other tokenizers exist that have different behavior.</p>
<p>For instance, the
<a href="/guide/en/elasticsearch/reference/2.4/analysis-keyword-tokenizer.html" class="ulink" target="_top"><code class="literal">keyword</code> tokenizer</a>
outputs exactly the same string as it received, without any tokenization. The
<a href="/guide/en/elasticsearch/reference/2.4/analysis-whitespace-tokenizer.html" class="ulink" target="_top"><code class="literal">whitespace</code> tokenizer</a>
splits text on whitespace only. The
<a href="/guide/en/elasticsearch/reference/2.4/analysis-pattern-tokenizer.html" class="ulink" target="_top"><code class="literal">pattern</code> tokenizer</a> can
be used to split text on a matching regular expression.</p>
</dd>
<dt>
<span class="term">
Token filters
</span>
</dt>
<dd>
<p>After tokenization, the resulting <em>token stream</em> is passed through any
specified token filters, in the order in which they are specified.</p>
<p>Token filters may change, add, or remove tokens.  We have already mentioned the
<a href="http://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lowercase-tokenizer.html" class="ulink" target="_top"><code class="literal">lowercase</code></a> and
<a href="/guide/en/elasticsearch/reference/2.4/analysis-stop-tokenfilter.html" class="ulink" target="_top"><code class="literal">stop</code> token filters</a>,
but there are many more available in Elasticsearch.
<a href="/guide/en/elasticsearch/reference/2.4/analysis-stemmer-tokenfilter.html" class="ulink" target="_top">Stemming token filters</a>
“stem” words to their root form. The
<a href="/guide/en/elasticsearch/reference/2.4/analysis-asciifolding-tokenfilter.html" class="ulink" target="_top"><code class="literal">ascii_folding</code> filter</a>
removes diacritics, converting a term like <code class="literal">"très"</code> into <code class="literal">"tres"</code>. The
<a href="/guide/en/elasticsearch/reference/2.4/analysis-ngram-tokenfilter.html" class="ulink" target="_top"><code class="literal">ngram</code></a> and
<a href="/guide/en/elasticsearch/reference/2.4/analysis-edgengram-tokenfilter.html" class="ulink" target="_top"><code class="literal">edge_ngram</code> token filters</a> can produce
tokens suitable for partial matching or autocomplete.</p>
</dd>
</dl>
</div>
<p>In <a class="xref" href="search-in-depth.html" title="Search in Depth">Search in Depth</a>, we discuss examples of where and how to use these
tokenizers and filters.  But first, we need to explain how to create a custom
analyzer.</p>
<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_creating_a_custom_analyzer"></a>Creating a Custom Analyzer<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/070_Index_Mgmt/20_Custom_Analyzers.asciidoc">edit</a>
</h3>
</div></div></div>
<p>In the same way as we configured the <code class="literal">es_std</code> analyzer previously, we can configure
character filters, tokenizers, and token filters in their respective sections
under <code class="literal">analysis</code>:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index
{
    "settings": {
        "analysis": {
            "char_filter": { ... custom character filters ... },
            "tokenizer":   { ...    custom tokenizers     ... },
            "filter":      { ...   custom token filters   ... },
            "analyzer":    { ...    custom analyzers      ... }
        }
    }
}</pre>
</div>
<p>As an example, let’s set up a custom analyzer that will do the following:</p>
<div class="olist orderedlist">
<ol class="orderedlist">
<li class="listitem">
Strip out HTML by using the <code class="literal">html_strip</code> character filter.
</li>
<li class="listitem">
<p>Replace <code class="literal">&amp;</code> characters with <code class="literal">" and "</code>, using a custom <code class="literal">mapping</code>
character filter:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">"char_filter": {
    "&amp;_to_and": {
        "type":       "mapping",
        "mappings": [ "&amp;=&gt; and "]
    }
}</pre>
</div>
</li>
<li class="listitem">
Tokenize words, using the <code class="literal">standard</code> tokenizer.
</li>
<li class="listitem">
Lowercase terms, using the <code class="literal">lowercase</code> token filter.
</li>
<li class="listitem">
<p>Remove a custom list of stopwords, using a custom <code class="literal">stop</code> token filter:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">"filter": {
    "my_stopwords": {
        "type":        "stop",
        "stopwords": [ "the", "a" ]
    }
}</pre>
</div>
</li>
</ol>
</div>
<p>Our analyzer definition combines the predefined tokenizer and filters with the
custom filters that we have configured previously:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">"analyzer": {
    "my_analyzer": {
        "type":           "custom",
        "char_filter":  [ "html_strip", "&amp;_to_and" ],
        "tokenizer":      "standard",
        "filter":       [ "lowercase", "my_stopwords" ]
    }
}</pre>
</div>
<p>To put it all together, the whole <code class="literal">create-index</code> request looks like this:</p>
<div class="pre_wrapper lang-sense">
<pre class="programlisting prettyprint lang-sense">PUT /my_index
{
    "settings": {
        "analysis": {
            "char_filter": {
                "&amp;_to_and": {
                    "type":       "mapping",
                    "mappings": [ "&amp;=&gt; and "]
            }},
            "filter": {
                "my_stopwords": {
                    "type":       "stop",
                    "stopwords": [ "the", "a" ]
            }},
            "analyzer": {
                "my_analyzer": {
                    "type":         "custom",
                    "char_filter":  [ "html_strip", "&amp;_to_and" ],
                    "tokenizer":    "standard",
                    "filter":       [ "lowercase", "my_stopwords" ]
            }}
}}}</pre>
</div>
<div class="sense_widget" data-snippet="snippets/070_Index_Mgmt/20_Custom_analyzer.json"></div>
<p>After creating the index, use the <code class="literal">analyze</code> API to test the new analyzer:</p>
<div class="pre_wrapper lang-sense">
<pre class="programlisting prettyprint lang-sense">GET /my_index/_analyze
{
    "text": "The quick &amp; brown fox",
    "analyzer": "my_analyzer"
}</pre>
</div>
<div class="sense_widget" data-snippet="snippets/070_Index_Mgmt/20_Custom_analyzer.json"></div>
<p>The following abbreviated results show that our analyzer is working correctly:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">{
  "tokens" : [
      { "token" :   "quick",    "position" : 2 },
      { "token" :   "and",      "position" : 3 },
      { "token" :   "brown",    "position" : 4 },
      { "token" :   "fox",      "position" : 5 }
    ]
}</pre>
</div>
<p>The analyzer is not much use unless we tell Elasticsearch where to use it. We
can apply it to a <code class="literal">string</code> field with a mapping such as the following:</p>
<div class="pre_wrapper lang-sense">
<pre class="programlisting prettyprint lang-sense">PUT /my_index/_mapping/my_type
{
    "properties": {
        "title": {
            "type":      "string",
            "analyzer":  "my_analyzer"
        }
    }
}</pre>
</div>
<div class="sense_widget" data-snippet="snippets/070_Index_Mgmt/20_Custom_analyzer.json"></div>
</div>

</div>
<div class="navfooter">
<span class="prev">
<a href="configuring-analyzers.html">« Configuring Analyzers</a>
</span>
<span class="next">
<a href="mapping.html">Types and Mappings »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
